bitkeeper revision 1.608.1.1 (3fba5b99WMvlBA7JwJeGU5vakf_qWg)
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Tue, 18 Nov 2003 17:49:13 +0000 (17:49 +0000)
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Tue, 18 Nov 2003 17:49:13 +0000 (17:49 +0000)
event_channel.c:
  new file
sched.h, mm.h, event.h, hypervisor-if.h, domain.c, entry.S:
  Event channels between domains. Also do not reschedule a domain if a particular guest event is already pending.

.rootkeys
xen/arch/i386/entry.S
xen/common/domain.c
xen/common/event_channel.c [new file with mode: 0644]
xen/include/hypervisor-ifs/hypervisor-if.h
xen/include/xeno/event.h
xen/include/xeno/mm.h
xen/include/xeno/sched.h

index b96338a42bd4f4711203d81b93f79ab2353a9a78..d790137e7552b8f325785d3e9d37458595fac6b1 100644 (file)
--- a/.rootkeys
+++ b/.rootkeys
 3ddb79bdYO5D8Av12NHqPeSviav7cg xen/common/domain.c
 3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/common/domain_page.c
 3ddb79bdeyutmaXEfpQvvxj7eQ0fCw xen/common/event.c
+3fba5b96H0khoxNiKbjdi0inpXV-Pw xen/common/event_channel.c
 3ddb79bd9drcFPVxd4w2GPOIjLlXpA xen/common/kernel.c
 3e4cd9d8LAAghUY0hNIK72uc2ch_Nw xen/common/keyhandler.c
 3ddb79bduhSEZI8xa7IbGQCpap5y2A xen/common/lib.c
index 1c828bc1ab4e2784e37cd5df6d351ff7d46334fc..e06c565de715f87ced5994f8bccf2a0fc6275e49 100644 (file)
@@ -725,6 +725,7 @@ ENTRY(hypervisor_call_table)
         .long SYMBOL_NAME(do_multicall)
         .long SYMBOL_NAME(do_kbd_op)
         .long SYMBOL_NAME(do_update_va_mapping)
+        .long SYMBOL_NAME(do_event_channel_op)
         .rept NR_syscalls-((.-hypervisor_call_table)/4)
         .long SYMBOL_NAME(sys_ni_syscall)
-       .endr
+        .endr
index 9edea30a8b92e59b758cf3edcca8638dff1b5445..2888e62417aa67c84fd72de3fc01d0ec15956438 100644 (file)
@@ -52,6 +52,7 @@ struct task_struct *do_createdomain(unsigned int dom_id, unsigned int cpu)
 
     spin_lock_init(&p->blk_ring_lock);
     spin_lock_init(&p->page_lock);
+    spin_lock_init(&p->event_channel_lock);
 
     p->shared_info = (void *)get_free_page(GFP_KERNEL);
     memset(p->shared_info, 0, PAGE_SIZE);
@@ -288,6 +289,8 @@ void free_all_dom_mem(struct task_struct *p)
 /* Release resources belonging to task @p. */
 void release_task(struct task_struct *p)
 {
+    extern void destroy_event_channels(struct task_struct *);
+
     ASSERT(p->state == TASK_DYING);
     ASSERT(!p->has_cpu);
 
@@ -300,6 +303,7 @@ void release_task(struct task_struct *p)
     destroy_blkdev_info(p);
 
     /* Free all memory associated with this domain. */
+    destroy_event_channels(p);
     free_page((unsigned long)p->mm.perdomain_pt);
     UNSHARE_PFN(virt_to_page(p->shared_info));
     free_page((unsigned long)p->shared_info);
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
new file mode 100644 (file)
index 0000000..052dc79
--- /dev/null
@@ -0,0 +1,338 @@
+/******************************************************************************
+ * event_channel.c
+ * 
+ * Event channels between domains.
+ * 
+ * Copyright (c) 2003, K A Fraser.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/lib.h>
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/event.h>
+
+
+static long event_channel_open(u16 target_dom)
+{
+    struct task_struct *lp = current, *rp;
+    int                 i, lmax, rmax, lid, rid;
+    event_channel_t    *lchn, *rchn;
+    shared_info_t      *rsi;
+    unsigned long       cpu_mask;
+    long                rc = 0;
+
+    rp = find_domain_by_id(target_dom);
+
+    /*
+     * We need locks at both ends to make a connection. We avoid deadlock
+     * by acquiring the locks in address order.
+     */
+    if ( (unsigned long)lp < (unsigned long)rp )
+    {
+        spin_lock(&lp->event_channel_lock);
+        spin_lock(&rp->event_channel_lock);
+    }
+    else
+    {
+        if ( likely(rp != NULL) )
+            spin_lock(&rp->event_channel_lock);
+        spin_lock(&lp->event_channel_lock);
+    }
+
+    lmax = lp->max_event_channel;
+    lchn = lp->event_channel;
+    lid  = -1;
+
+    /*
+     * Find the first unused event channel. Also ensure bo channel already
+     * exists to the specified target domain.
+     */
+    for ( i = 0; i < lmax; i++ )
+    {
+        if ( (lid == -1) && !(lchn[i].flags & ECF_INUSE) )
+        {
+            lid = i;
+        }
+        else if ( unlikely(lchn[i].target_dom == target_dom) )
+        {
+            rc = -EEXIST;
+            goto out;
+        }
+    }
+    
+    /* If there is no free slot we need to allocate a bigger channel list. */
+    if ( unlikely(lid == -1) )
+    {
+        /* Reached maximum channel count? */
+        if ( unlikely(lmax == 1024) )
+        {
+            rc = -ENOSPC;
+            goto out;
+        }
+        
+        lmax = (lmax == 0) ? 4 : (lmax * 2);
+        
+        lchn = kmalloc(lmax * sizeof(event_channel_t), GFP_KERNEL);
+        if ( unlikely(lchn == NULL) )
+        {
+            rc = -ENOMEM;
+            goto out;
+        }
+
+        memset(lchn, 0, lmax * sizeof(event_channel_t));
+        
+        if ( likely(lp->event_channel != NULL) )
+            kfree(lp->event_channel);
+
+        lp->event_channel     = lchn;
+        lp->max_event_channel = lmax;
+    }
+
+    lchn[lid].target_dom = target_dom;
+    lchn[lid].flags      = ECF_INUSE;
+
+    if ( likely(rp != NULL) )
+    {
+        rchn = rp->event_channel;
+        rmax = rp->max_event_channel;
+        
+        for ( rid = 0; rid < rmax; rid++ )
+        {
+            if ( (rchn[rid].target_dom == lp->domain) &&
+                 (rchn[rid].flags & ECF_INUSE) )
+            {
+                /*
+                 * The target was awaiting a connection. We make the connection
+                 * and send a connection-made event to the remote end.
+                 */
+                rchn[rid].flags = ECF_INUSE | ECF_CONNECTED | lid;
+                lchn[lid].flags = ECF_INUSE | ECF_CONNECTED | rid;
+
+                rsi = rp->shared_info;
+                if ( !test_and_set_bit(rid,    &rsi->event_channel_pend[0]) &&
+                     !test_and_set_bit(rid>>5, &rsi->event_channel_pend_sel) )
+                {
+                    cpu_mask = mark_guest_event(rp, _EVENT_EVTCHN);
+                    guest_event_notify(cpu_mask);
+                }
+
+                break;
+            }
+        }
+    }
+    
+ out:
+    spin_unlock(&lp->event_channel_lock);
+    if ( rp != NULL )
+    {
+        spin_unlock(&rp->event_channel_lock);
+        put_task_struct(rp);
+    }
+
+    return rc;
+}
+
+
+static long event_channel_close(u16 lid)
+{
+    struct task_struct *lp = current, *rp = NULL;
+    event_channel_t    *lchn, *rchn;
+    u16                 rid;
+    shared_info_t      *rsi;
+    unsigned long       cpu_mask;
+    long                rc = 0;
+
+ again:
+    spin_lock(&lp->event_channel_lock);
+
+    lchn = lp->event_channel;
+
+    if ( unlikely(lid >= lp->max_event_channel) || 
+         unlikely(!(lchn[lid].flags & ECF_INUSE)) )
+    {
+        rc = -EINVAL;
+        goto out;
+    }
+
+    if ( lchn[lid].flags & ECF_CONNECTED )
+    {
+        if ( rp == NULL )
+        {
+            rp = find_domain_by_id(lchn[lid].target_dom);
+            ASSERT(rp != NULL);
+            
+            if ( (unsigned long)lp < (unsigned long)rp )
+            {
+                spin_lock(&rp->event_channel_lock);
+            }
+            else
+            {
+                spin_unlock(&lp->event_channel_lock);
+                spin_lock(&rp->event_channel_lock);
+                goto again;
+            }
+        }
+        else if ( rp->domain != lchn[lid].target_dom )
+        {
+            rc = -EINVAL;
+            goto out;
+        }
+        
+        rchn = rp->event_channel;
+        rid  = lchn[lid].flags & ECF_TARGET_ID;
+        ASSERT(rid < rp->max_event_channel);
+        ASSERT(rchn[rid].flags == (ECF_INUSE | ECF_CONNECTED | lid));
+        ASSERT(rchn[rid].target_dom == lp->domain);
+
+        rchn[rid].flags = ECF_INUSE;
+
+        rsi = rp->shared_info;
+        if ( !test_and_set_bit(rid,    &rsi->event_channel_disc[0]) &&
+             !test_and_set_bit(rid>>5, &rsi->event_channel_disc_sel) )
+        {
+            cpu_mask = mark_guest_event(rp, _EVENT_EVTCHN);
+            guest_event_notify(cpu_mask);
+        }
+    }
+
+    lchn[lid].target_dom = 0;
+    lchn[lid].flags      = 0;
+    
+ out:
+    spin_unlock(&lp->event_channel_lock);
+    if ( rp != NULL )
+    {
+        spin_unlock(&rp->event_channel_lock);
+        put_task_struct(rp);
+    }
+    
+    return rc;
+}
+
+
+static long event_channel_send(u16 lid)
+{
+    struct task_struct *lp = current, *rp;
+    event_channel_t    *lchn, *rchn;
+    u16                 rid;
+    shared_info_t      *rsi;
+    unsigned long       cpu_mask;
+
+    spin_lock(&lp->event_channel_lock);
+
+    lchn = lp->event_channel;
+
+    if ( unlikely(lid >= lp->max_event_channel) || 
+         unlikely((lchn[lid].flags & (ECF_INUSE|ECF_CONNECTED)) !=
+                  (ECF_INUSE|ECF_CONNECTED)) )
+    {
+        spin_unlock(&lp->event_channel_lock);
+        return -EINVAL;
+    }
+
+    rid  = lchn[lid].flags & ECF_TARGET_ID;
+    rp   = find_domain_by_id(lchn[lid].target_dom);
+    ASSERT(rp != NULL);
+
+    spin_unlock(&lp->event_channel_lock);
+
+    spin_lock(&rp->event_channel_lock);
+
+    rchn = rp->event_channel;
+
+    if ( unlikely(rid >= rp->max_event_channel) )
+    {
+        spin_unlock(&rp->event_channel_lock);
+        put_task_struct(rp);
+        return -EINVAL;
+    }
+
+    rsi = rp->shared_info;
+    if ( !test_and_set_bit(rid,    &rsi->event_channel_pend[0]) &&
+         !test_and_set_bit(rid>>5, &rsi->event_channel_pend_sel) )
+    {
+        cpu_mask = mark_guest_event(rp, _EVENT_EVTCHN);
+        guest_event_notify(cpu_mask);
+    }
+
+    spin_unlock(&rp->event_channel_lock);
+    put_task_struct(rp);
+    return 0;
+}
+
+
+static long event_channel_status(u16 lid)
+{
+    struct task_struct *lp = current;
+    event_channel_t    *lchn;
+    long                rc = EVTCHNSTAT_closed;
+
+    spin_lock(&lp->event_channel_lock);
+
+    lchn = lp->event_channel;
+
+    if ( lid < lp->max_event_channel )
+    {
+        if ( (lchn[lid].flags & (ECF_INUSE|ECF_CONNECTED)) == ECF_INUSE )
+            rc = EVTCHNSTAT_connected;        
+        else if ( lchn[lid].flags & ECF_INUSE )
+            rc = EVTCHNSTAT_disconnected;
+    }
+
+    spin_unlock(&lp->event_channel_lock);
+    return rc;
+}
+
+
+long do_event_channel_op(unsigned int cmd, unsigned int id)
+{
+    long rc;
+
+    switch ( cmd )
+    {
+    case EVTCHNOP_open:
+        rc = event_channel_open((u16)id);
+        break;
+
+    case EVTCHNOP_close:
+        rc = event_channel_close((u16)id);
+        break;
+
+    case EVTCHNOP_send:
+        rc = event_channel_send((u16)id);
+        break;
+
+    case EVTCHNOP_status:
+        rc = event_channel_status((u16)id);
+        break;
+
+    default:
+        rc = -ENOSYS;
+        break;
+    }
+
+    return rc;
+}
+
+
+void destroy_event_channels(struct task_struct *p)
+{
+    int i;
+    if ( p->event_channel != NULL )
+    {
+        for ( i = 0; i < p->max_event_channel; i++ )
+            (void)event_channel_close((u16)i);
+        kfree(p->event_channel);
+    }
+}
index 5bc9bf6b11d92d5973aeaeef1e9d97aeadf02c2b..5bd13dba9b599e2034f0b72897db5bd61eef9c19 100644 (file)
@@ -60,6 +60,7 @@
 #define __HYPERVISOR_multicall            17
 #define __HYPERVISOR_kbd_op               18
 #define __HYPERVISOR_update_va_mapping    19
+#define __HYPERVISOR_event_channel_op     20
 
 /* And the trap vector is... */
 #define TRAP_INSTR "int $0x82"
@@ -91,6 +92,7 @@
 #define EVENT_NET      0x10 /* There are packets for transmission. */
 #define EVENT_PS2      0x20 /* PS/2 keyboard or mouse event(s) */
 #define EVENT_STOP     0x40 /* Prepare for stopping and possible pickling */
+#define EVENT_EVTCHN   0x80 /* Event pending on an event channel */
 
 /* Bit offsets, as opposed to the above masks. */
 #define _EVENT_BLKDEV   0
 #define _EVENT_NET      4
 #define _EVENT_PS2      5
 #define _EVENT_STOP     6
+#define _EVENT_EVTCHN   7
 
 /*
  * Virtual addresses beyond this are not modifiable by guest OSes. The 
 #define SCHEDOP_exit            1
 #define SCHEDOP_stop            2
 
+/*
+ * EVTCHNOP_* - Event channel operations.
+ */
+#define EVTCHNOP_open           0  /* Open channel to <target domain>.    */
+#define EVTCHNOP_close          1  /* Close <channel id>.                 */
+#define EVTCHNOP_send           2  /* Send event on <channel id>.         */
+#define EVTCHNOP_status         3  /* Get status of <channel id>.         */
+
+/*
+ * EVTCHNSTAT_* - Non-error return values from EVTCHNOP_status.
+ */
+#define EVTCHNSTAT_closed       0  /* Chennel is not in use.              */
+#define EVTCHNSTAT_disconnected 1  /* Channel is not connected to remote. */
+#define EVTCHNSTAT_connected    2  /* Channel is connected to remote.     */
+
 
 #ifndef __ASSEMBLY__
 
@@ -237,6 +254,30 @@ typedef struct shared_info_st {
      */
     unsigned long events_mask;
 
+    /*
+     * A domain can have up to 1024 bidirectional event channels to/from other
+     * domains. Domains must agree out-of-band to set up a connection, and then
+     * each must explicitly request a connection to the other. When both have
+     * made the request the channel is fully allocated and set up.
+     * 
+     * An event channel is a single sticky 'bit' of information. Setting the
+     * sticky bit also causes an upcall into the target domain. In this way
+     * events can be seen as an IPI [Inter-Process(or) Interrupt].
+     * 
+     * A guest can see which of its event channels are pending by reading the
+     * 'event_channel_pend' bitfield. To avoid a linear scan of the entire
+     * bitfield there is a 'selector' which indicates which words in the
+     * bitfield contain at least one set bit.
+     * 
+     * There is a similar bitfield to indicate which event channels have been
+     * disconnected by the remote end. There is also a 'selector' for this
+     * field.
+     */
+    u32 event_channel_pend[32];
+    u32 event_channel_pend_sel;
+    u32 event_channel_disc[32];
+    u32 event_channel_disc_sel;
+
     /*
      * Time: The following abstractions are exposed: System Time, Clock Time,
      * Domain Virtual Time. Domains can access Cycle counter time directly.
index fdb9fed24d0539f00a50d64b36b7501be31a7b57..c733dc46fb418743ede94cfd423f46f5a70bd0de 100644 (file)
@@ -28,7 +28,8 @@
  */
 static inline unsigned long mark_guest_event(struct task_struct *p, int event)
 {
-    set_bit(event, &p->shared_info->events);
+    if ( test_and_set_bit(event, &p->shared_info->events) )
+        return 0;
 
     /*
      * No need for the runqueue_lock! The check below does not race
@@ -46,7 +47,8 @@ static inline unsigned long mark_guest_event(struct task_struct *p, int event)
 /* As above, but hyp_events are handled within the hypervisor. */
 static inline unsigned long mark_hyp_event(struct task_struct *p, int event)
 {
-    set_bit(event, &p->hyp_events);
+    if ( test_and_set_bit(event, &p->shared_info->events) )
+        return 0;
     smp_mb();
     if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p);
     reschedule(p);
@@ -64,17 +66,21 @@ static inline void guest_event_notify(unsigned long cpu_mask)
 
 static inline unsigned long mark_guest_event(struct task_struct *p, int event)
 {
-    set_bit(event, &p->shared_info->events);
-    if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p);
-    reschedule(p);
+    if ( !test_and_set_bit(event, &p->shared_info->events) )
+    {
+        if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p);
+        reschedule(p);
+    }
     return 0;
 }
 
 static inline unsigned long mark_hyp_event(struct task_struct *p, int event)
 {
-    set_bit(event, &p->hyp_events);
-    if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p);
-    reschedule(p);
+    if ( !test_and_set_bit(event, &p->hyp_events) )
+    {
+        if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p);
+        reschedule(p);
+    }
     return 0;
 }
 
index 6d0f6bf6fd6c20d583919c285dad738a498f487c..d565583d6ad8e96674e85961db6845b6fcba4eb4 100644 (file)
@@ -78,7 +78,7 @@ typedef struct pfn_info {
 #define page_type_count(p)      ((p)->type_count)
 #define set_page_type_count(p,v) ((p)->type_count = v)
 
-#define PG_domain_mask 0x00ffffff /* owning domain (24 bits) */
+#define PG_domain_mask MAX_DOMAIN_ID /* owning domain (16 bits) */
 /* hypervisor flags (domain == 0) */
 #define PG_slab               24
 /* domain flags (domain != 0) */
index 812336e725ef8ed2895a596c3b39137e1a79f36c..d4caca2cd215c4405142f0a313104913e7253ced 100644 (file)
@@ -43,6 +43,18 @@ extern struct mm_struct init_mm;
 /* SMH: replace below when have explicit 'priv' flag or bitmask */
 #define IS_PRIV(_p) ((_p)->domain == 0) 
 
+#define DOMAIN_ID_BITS (16)
+#define MAX_DOMAIN_ID  ((1<<(DOMAIN_ID_BITS))-1)
+
+typedef struct event_channel_st
+{
+    u16 target_dom; /* Target domain (i.e. domain at remote end). */
+#define ECF_TARGET_ID ((1<<10)-1) /* Channel identifier at remote end.    */
+#define ECF_INUSE     (1<<10)     /* Is this channel descriptor in use?   */
+#define ECF_CONNECTED (1<<11)     /* Is this channel connected to remote? */
+    u16 flags;
+} event_channel_t;
+
 struct task_struct 
 {
     /*
@@ -129,6 +141,11 @@ struct task_struct
     struct thread_struct thread;
     struct task_struct *prev_task, *next_task, *next_hash;
     
+    /* Event channel information. */
+    event_channel_t *event_channel;
+    unsigned int     max_event_channel;
+    spinlock_t       event_channel_lock;
+
     unsigned long flags;
 
     atomic_t refcnt;